Experimental BBS Explossion 3

home *** CD-ROM | disk | FTP | other *** search

/ Experimental BBS Explossion 3 / Experimental BBS Explossion III.iso / graphics / flick_12.zip / CHUNKY8.S < prev next >

Wrap

Text File | 1994-02-18 | 9KB | 451 lines

; ; Flick FLI-format Animation Viewer v1.2 19 Feb 1994 ; -------------------------------------- ; ; ;This program plays FLI/FLC-format bitmapped animation files on any ECS ;or AGA Amiga running OS2.04 or higher. FLI/FLC-format files are ;produced by Autodesk Animator and Autodesk 3D Studio on a PC, as well ;as by other programs. ; ;The files in this archive may be distributed anywhere provided they are ;unmodified and are not sold for profit. ; ;Ownership and copyright of all files remains with the author: ; ; Peter McGavin, 86 Totara Crescent, Lower Hutt, New Zealand. ; e-mail: peterm@maths.grace.cri.nz ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; xdef _chunky2planar ; Basically the same as peterm/chunky6.s, except unwind loops ; as much as possible. This is much better optimised for a 68040, ; but inefficient on lower processors. ;----------------------------------------------------------------------------- ; Set Macro68 defaults default _branch,_word default _adrbasedisp,_word default _pcbasedisp,_word default _outerdisp,_word default _absolute,_pcrel ;----------------------------------------------------------------------------- ; chunky2planar: (new Motorola syntax) ; a0 -> chunky pixels ; a1 -> plane0 (assume other 7 planes are allocated contiguously) ; d1 = width*height/32 (if generic is defined) ifnd generic plsiz equ width*height/8 endc _LVOCacheClearU equ -636 ifd generic ifeq depth-8 _c2p_8_040:: else ifeq depth-6 _c2p_6_040:: else ifeq depth-4 _c2p_4_040:: endc endc endc else ifeq depth-8 ifeq width-320 _c2p320x200x8_040:: endc else ifeq depth-6 ifeq width-320 _c2p320x200x6_040:: endc else die "Unrecognised resolution" endc endc endc movem.l d2-d7/a2-a6,-(sp) bset #0,(firsttimeflag) bne.b skip_relocate ; branch if not being called 1st time ; relocate the mainloop to a quad-longword boundary (for 030/040 cache line) lea (begincode,pc),a2 adda.w #15,a2 move.l a2,d0 and.w #~15,d0 movea.l d0,a2 lea (mainloop,pc),a3 move.w #(endcode-mainloop)/2-1,d0 1$: move.w (a3)+,(a2)+ dbra d0,1$ ; flush the caches movem.l a0/a1/d1,-(sp) movea.l (4).w,a6 jsr (_LVOCacheClearU,a6) movem.l (sp)+,a0/a1/d1 skip_relocate: move.w sp,d0 and.w #15,d0 add.w #64,d0 ; make room on stack for suba.w d0,sp ; 64-byte quad-longword aligned buffer movea.l sp,a3 ; pointed to by a3 move.w d0,-(sp) ; and save the allocated size ifd generic move.l d1,-(sp) ; plsiz on stack at (6,sp) move.l d1,d0 ifgt depth-4 lsl.l #3,d0 else lsl.l #2,d0 endc sub.l d1,d0 move.l d0,-(sp) ; 7*plsiz or 3*plsiz on stack at (2,sp) lsr.l #2,d1 subq.l #1,d1 move.w d1,-(sp) ; outer loop counter on stack at (sp) else move.w #plsiz/4-1,-(sp) ; outer loop counter on stack at (sp) endc ; set up register constants move.l #$0f0f0f0f,d5 ; d5 = constant $0f0f0f0f move.l #$55555555,d6 ; d6 = constant $55555555 move.l #$3333cccc,d7 ; d7 = constant $3333cccc ; load up address registers with buffer ptrs lea (4*4,a3),a4 ; a4 -> plane2buf ifgt depth-4 lea (4*4,a4),a5 ; a5 -> plane4buf lea (4*4,a5),a6 ; a6 -> plane6buf endc ; Macros part1 and part2 together convert 8 pixels from chunky to stack buffers part1 macro move.l (a0)+,d2 ; 12 get next 4 chunky pixels in d2 move.l (a0)+,d3 ; 12 get next 4 chunky pixels in d3 ifgt depth-4 move.l d2,d0 ; 4 and.l d5,d2 ; 8 d5=$0f0f0f0f move.l d3,d1 ; 4 and.l d5,d3 ; 8 d5=$0f0f0f0f eor.l d2,d0 ; 8 eor.l d3,d1 ; 8 lsr.l #4,d1 ; 16 or.l d1,d0 ; 8 endc lsl.l #4,d2 ; 16 or.l d3,d2 ; 8 move.l d2,d3 ; 4 and.l d7,d3 ; 8 d7=$3333cccc eor.l d3,d2 ; 8 lsr.w #2,d3 ; 10 swap d3 ; 4 lsl.w #2,d3 ; 10 or.l d2,d3 ; 8 ifgt depth-4 move.l d0,d1 ; 4 and.l d7,d1 ; 8 d7=$3333cccc eor.l d1,d0 ; 8 lsr.w #2,d1 ; 10 swap d1 ; 4 lsl.w #2,d1 ; 10 or.l d0,d1 ; 8 move.l d1,d2 ; 4 lsr.l #7,d2 ; 22 move.l d1,d0 ; 4 and.l d6,d0 ; 8 d6=$55555555 endc endm part2 macro ifgt depth-4 eor.l d0,d1 ; 8 move.l d2,d4 ; 4 and.l d6,d4 ; 8 d6=$55555555 eor.l d4,d2 ; 8 or.l d4,d1 ; 8 lsr.l #1,d1 ; 10 move.b d1,(8,a5) ; 12 plane 5 ifgt depth-6 swap d1 ; 4 move.b d1,(8,a6) ; 12 plane 7 endc or.l d0,d2 ; 8 move.b d2,(a5)+ ; 8 plane 4 ifgt depth-6 swap d2 ; 4 move.b d2,(a6)+ ; 8 plane 6 endc endc move.l d3,d2 ; 4 lsr.l #7,d2 ; 22 move.l d3,d0 ; 4 and.l d6,d0 ; 8 d6=$55555555 eor.l d0,d3 ; 8 move.l d2,d4 ; 4 and.l d6,d4 ; 8 d6=$55555555 eor.l d4,d2 ; 8 or.l d4,d3 ; 8 lsr.l #1,d3 ; 10 move.b d3,(8,a3) ; 12 plane 1 swap d3 ; 4 move.b d3,(8,a4) ; 12 plane 3 or.l d0,d2 ; 8 move.b d2,(a3)+ ; 8 plane 0 swap d2 ; 4 move.b d2,(a4)+ ; 8 plane 2 endm ; optimised suba subao macro ifd generic suba.l (6,sp),a1 else iflt plsiz-32768 suba.w #plsiz,a1 else suba.l #plsiz,a1 endc endc endm ; convert the first 32 pixels to stack buffers as a special case rept 4 part1 part2 endr begincode: rept 8 ; space for mainloop code relocation nop endr ; main loop (starts here) processes 64 chunky pixels at a time mainloop: ; Process the next 32 pixels from chunky to stack buffers while at the same ; time moving the result of the previous 32 pixels from stack buffers to ; Chip ram planes. ; Chip writes are spaced as widely apart as possible, so that there is ; always something useful happening while waiting for the Chip bus. ifgt depth-4 ifd generic adda.l (2,sp),a1 ; add 7*plsiz else adda.l #7*plsiz,a1 ; a1 points into plane 0 endc ifgt depth-6 move.l (4,a6),(a1) ; plane 7 endc part1 subao ifgt depth-6 move.l (-4,a6),(a1) ; plane 6 endc part2 subao move.l (3,a5),(a1) ; plane 5 part1 subao move.l (-5,a5),(a1) ; plane 4 part2 subao move.l (2,a4),(a1) ; plane 3 part1 subao move.l (-6,a4),(a1) ; plane 2 part2 subao move.l (1,a3),(a1) ; plane 1 part1 subao move.l (-7,a3),(a1)+ ; plane 0 part2 else ifd generic adda.l (2,sp),a1 ; add 3*plsiz else adda.l #3*plsiz,a1 ; a1 points into plane 0 endc move.l (4,a4),(a1) ; plane 3 part1 part2 subao move.l (-5,a4),(a1) ; plane 2 part1 part2 subao move.l (2,a3),(a1) ; plane 1 part1 part2 subao move.l (-7,a3),(a1)+ ; plane 0 part1 part2 endc ; check if finished sub.w #1,(sp) beq.w done ; restore stack buffer pointers subq.l #8,a3 subq.l #8,a4 ifgt depth-4 subq.l #8,a5 subq.l #8,a6 endc ; Process the next 32 pixels from chunky to stack buffers while at the same ; time moving the result of the previous 32 pixels from stack buffers to ; Chip ram planes. ifgt depth-4 ifd generic adda.l (2,sp),a1 ; add 7*plsiz else adda.l #7*plsiz,a1 ; a1 points into plane 0 endc ifgt depth-6 move.l (12,a6),(a1) ; plane 7 endc part1 subao ifgt depth-6 move.l (4,a6),(a1) ; plane 6 endc part2 subao move.l (11,a5),(a1) ; plane 5 part1 subao move.l (3,a5),(a1) ; plane 4 part2 subao move.l (10,a4),(a1) ; plane 3 part1 subao move.l (2,a4),(a1) ; plane 2 part2 subao move.l (9,a3),(a1) ; plane 1 part1 subao move.l (1,a3),(a1)+ ; plane 0 part2 else ifd generic adda.l (2,sp),a1 ; add 3*plsiz else adda.l #3*plsiz,a1 ; a1 points into plane 0 endc move.l (12,a4),(a1) ; plane 7 part1 part2 subao move.l (3,a4),(a1) ; plane 5 part1 part2 subao move.l (10,a3),(a1) ; plane 3 part1 part2 subao move.l (1,a3),(a1)+ ; plane 1 part1 part2 endc ; check if finished, go back for more sub.w #1,(sp) bne.w mainloop ; correction addq.l #4,a3 addq.l #4,a4 addq.l #4,a5 addq.l #4,a6 ; write the last longword from stack buffer to planes done: ifd generic adda.l (2,sp),a1 ; add 7*plsiz or 3*plsiz ifgt depth-4 ifgt depth-6 move.l (4,a6),(a1) ; plane 7 endc subao ifgt depth-6 move.l (-4,a6),(a1) ; plane 6 endc subao endc else adda.l #(depth-1)*plsiz,a1 ; a1 points into plane 7 ifgt depth-6 move.l (4,a6),(a1) ; plane 7 subao move.l (-4,a6),(a1) ; plane 6 subao endc endc ifgt depth-4 move.l (4,a5),(a1) ; plane 5 subao move.l (-4,a5),(a1) ; plane 4 subao endc move.l (4,a4),(a1) ; plane 3 subao move.l (-4,a4),(a1) ; plane 2 subao move.l (4,a3),(a1) ; plane 1 subao move.l (-4,a3),(a1)+ ; plane 0 ; all done! restore stack and return ifd generic adda.w #4+4+2,sp ; remove stack variables else addq.w #2,sp ; remove outer loop counter endc adda.w (sp)+,sp ; remove aligned 32-byte buffer movem.l (sp)+,d2-d7/a2-a6 rts endcode: firsttimeflag: dc.b 0 even ;----------------------------------------------------------------------------- end